use "$out/IVdata_accidents_bnoSext2_SL5_2023Nov22.dta", clear
* keep only those people who have an accident
keep if HS==1
keep if ABS==0
keep anon tmerge_t t d maxt_clue mint_clu flagEMPml1 flagEMPm0 ABS HS ferfi kor
drop d
merge 1:1 anon using "$out/correct_d_for_HSnoABS.dta", keepusing(ABSd_real)
drop if _merge==2
drop _merge
rename ABSd_real d
assert d==1 | d==2
keep if d==2
tempfile d2
save `d2'

use "$out/data_for_main_regs_20231122.dta", clear

* keep only those people who have an accident
keep if HS==1

keep if ABS==1 // keep those within ACC sample who have an ABS spell after ACC //
keep anon mint_clu maxt_clu d ABS HS tmerge_t flagEMPml1 flagEMPm0 ferfi kor t
append using `d2'
tab d
rename t ttemp
	
* this is the universe of treated, bring in some admin data for all the periods they are observed  in Admin3 *
merge 1:m anon using "$in/admin3_alap.dta", nogen keepusing(ev t w1 vallazon wh1 fogvisz1) keep(master match)
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays
* for each t, determine for a given anon if having the ACC or not yet *
gen treated = 1 if t==maxt_clu
	replace treat = 0 if mint_clu>t 
	// not yet treated //
* for each t, determine how long in the future a given anon will be treated *
gen time_remaining = mint_clu-t 
// time remaining until treatment
* for each t and anon, determine if the anon-t pair is far enough in the future to consider it as a control *
gen control = 1 if time_remaining>=36 
	recode control (.=0)	
	
drop SLdays vallazon wh1 fogvisz1 
xtset anon t
replace d=1 if control==1	
keep if control==1 | treat==1		

gen maxtflag = maxt_clu if treat==1 // for treated: maxtflag is the time of the ACC
	replace maxtflag = t if control==1 // for controls, take all the t's when they haven't had the accident yet //

foreach X in flagEMPml1 flagEMPm0 mint_clu maxt_clu{
	replace `X'=. if control==1
}
	
* we can only look between t_shock 85-156 (January 2010 - December 2015), to allow for 1 year pre-time and 3 years post-time (=65 months, 5.5 years)
keep if maxtflag>=85 & maxtflag<157

replace tmerge_t=t if control==1
replace t=ttemp if treated==1
drop ttemp maxtflag

egen id=group(anon t)

* run data costruction code from here *

gen month_shock = t-12*int(t/12)
	recode month_shock (0=12)
gen month_return = month_shock+d
gen diff_year = 1 if month_return >= 13

keep anon tmerge_t t d maxt_clue mint_clu flagEMPml1 flagEMPm0 diff_year treated control ferfi kor id

    
*tmerge_t        month of the shock 
*t		month of merging firm-level data in m=0 or m=-1 (=tmerge_t if flagEMPm0==1; =tmerge_t-1 if flagEMPml1==1)
*maxt_clue       last month of the clustered accident (t in which m=0 for the last time)
*mint_clu        first month of the clustered accident (t in which m=0 for the first time)
*flagEMPml1      indicator for treated being EMP in one month before the accident
*flagEMPm0       indicator for treated being EMP in the month of the accident

merge 1:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 fogvisz1 feor1_h2 teaor1_h1)
merge 1:1 anon t using "$in/admin3_feor.dta", keep(master match) nogen keepusing(feor1_2003_4 feor1_2008_4)

* add controls and outcome variables
* outcomes: lnw1_restr FEj - in relative_time 0,1,2,3
* additional outcomes: FO mean_tfp_wr - in relative time 0,1,2,3
* controls: kor occupation teaor1_h1 ferfi mean_lag_w log_health_12mo A_12mo-V_12mo FEi_imp flag_missingFEi size_pre_imp FO_pre_imp logmean_w_pre_imp flag_missingsize_pre flag_missingFO_pre flag_missinglogmean_w_pre	flag_missingFEj_l4 - FEj_l12_imp 	
* one row is an anon-t-d triplet

merge m:1 anon using "$out_AKM/FEi_full_Jan2023.dta"
tab _m
drop if _m==2
drop _m
rename anon_fix FEi

* step 1: add the t-related

* assign occupation in t
rename feor1_h2 feor1_h2_t
gen occupation=.
	replace occupation=0 if missing(feor1_h2_t)
	replace occupation=3 if feor1_h2_t==11
	replace occupation=4 if feor1_h2_t==12
	replace occupation=7 if feor1_h2_t==13
	replace occupation=1 if feor1_h2_t==14
	replace occupation=1 if feor1_h2_t==15
	replace occupation=3 if feor1_h2_t>=17 & feor1_h2_t<=23
	replace occupation=4 if feor1_h2_t>=24 & feor1_h2_t<=33
	replace occupation=5 if feor1_h2_t>=34 & feor1_h2_t<=43
	replace occupation=6 if feor1_h2_t>=44 & feor1_h2_t<=47
	replace occupation=7 if feor1_h2_t==48	 
*drop feor1_h2_t
rename vallazon1 vallazon
gen ev=2003+int((t-1)/12)
merge m:1 vallazon ev using "$out/firmquality.dta", nogen keep(master match) keepusing(letszam FO mean_firmw mean_tfp_wr)
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays
foreach var of varlist mean_firmw FO letszam mean_tfp_wr feor1_2003_4 feor1_2008_4{
	rename `var' `var'_t
}
rename *mean_firmw* *mean_w*
rename *letszam* *size*
foreach x in size FO mean_w mean_tfp_wr{
	rename 	`x'_t `x'_pre
}
gen FO_t=FO_pre
gen mean_tfp_wr_t=mean_tfp_wr_pre
gen logmean_w_pre=log(mean_w_pre)
drop mean_w_pre

* add l1-l3 values of restricted wage, firm FE, hours worked and additional wage variables

gen n=_n
expand 4
rename t t_pre
egen t=seq(), by(n)
replace t=t-4
replace t=t +tmerge_t

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)
drop SLdays

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99
drop EMP

foreach X in vallazon w1 w1_wins wh1 fogvisz1{
	rename `X' `X'_t
}

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 )
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays

* deflated wage and hours worked changed to missing if EMP!=1 that period
gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)
tab EMP if t==tmerge_t-1, m
tab treated if EMP==. & t==tmerge_t-1
xtset id t
tab treated if EMP==. & l.EMP==. & l2.EMP==. & t==tmerge_t-1


* create a winsorized version of the firm FE variable, only for the EMP=1
preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

replace ev=2003+int((t-1)/12)

gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* deflate the wage variable	
gen w1_defl=w1/defl
gen w1_wins_defl=w1_wins/defl

replace w1_defl=. if EMP!=1
replace w1_wins_defl=. if EMP!=1
replace wh1=. if EMP!=1 

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	

* creating month of the year
gen ho = t-int(t/12)*12
	recode ho(0=12)

* alternative wage variables 

foreach y in "" _wins{	
	foreach x in "" j{
		gen w`y'_`x'nap = w1`y'_defl/(`x'nap/31) if ho==1 | ho==3 | ho==5 | ho==7 | ho==8 | ho==10 | ho==12
			replace w`y'_`x'nap = w1`y'_defl/(`x'nap/30) if ho==4 | ho==6 | ho==9 | ho==11
			replace w`y'_`x'nap = w1`y'_defl/(`x'nap/29) if ho==2 & (ev==2012 | ev==2008 | ev==2004)
			replace w`y'_`x'nap = w1`y'_defl/(`x'nap/28) if ho==2 & ev!=2012 & ev!=2008 &  ev!=2004
			replace w`y'_`x'nap = w1`y'_defl if `x'nap==.
	}

	gen lnw1`y'_restr=ln((w1`y'_defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_adj=ln((w`y'_nap)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_nap=ln(w`y'_nap) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_jnap=ln(w`y'_jnap) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_totw=ln(w1`y'_defl)
		
	foreach X in lnw1`y'_restr lnw1`y'_adj lnw1`y'_nap lnw1`y'_jnap lnw1`y'_totw FEj`y'{
		egen `X'_t_=mean(`X') if t<tmerge_t, by(n)
		egen `X'_t0=min(`X'_t_), by(n)
		drop `X'_t_
	}
}

foreach X in wh1{
	egen `X'_t_=mean(`X') if t<tmerge_t, by(n)
	egen `X'_t0=min(`X'_t_), by(n)
	drop `X'_t_
}

keep if t==tmerge_t
drop t nap jnap vallazon w1 w1_wins wh1 fogvisz1 transfer_ny transfer_mtp FEj SLdays defl w1_defl w1_wins_defl EMP ho w_nap w_jnap lnw1_restr lnw1_adj lnw1_nap lnw1_jnap lnw1_totw w_wins_nap w_wins_jnap lnw1_wins_restr lnw1_wins_adj lnw1_wins_nap lnw1_wins_jnap lnw1_wins_totw 
sum lnw1_restr_t0 lnw1_adj_t0 lnw1_nap_t0 lnw1_jnap_t0 lnw1_totw_t0 lnw1_wins_restr_t0 lnw1_wins_adj_t0 lnw1_wins_nap_t0 lnw1_wins_jnap_t0 lnw1_wins_totw_t0 FEj_t0 wh1_t0 FEj_wins_t0

* add l4-l12 values of lnw1_restr and FEj 

expand 10
egen t=seq(), by(n)
replace t=t-14 if t!=1
replace t=0 if t==1
replace t=t +tmerge_t

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 )
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)

preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

replace ev=2003+int((t-1)/12)

gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	
gen lnw1_restr=log((w1/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
replace lnw1_restr=. if EMP!=1
gen lnw1_wins_restr=log((w1_wins/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
replace lnw1_wins_restr=. if EMP!=1
drop EMP
egen mean_lag_w_=mean(lnw1_restr) if t<tmerge_t, by(n)
egen mean_lag_w=min(mean_lag_w_), by(n)
drop mean_lag_w_
egen mean_lag_w_wins_=mean(lnw1_wins_restr) if t<tmerge_t, by(n)
egen mean_lag_w_wins=min(mean_lag_w_wins_), by(n)
drop mean_lag_w_wins_
drop w1 w1_wins wh1 transfer_ny transfer_mtp lnw1_restr lnw1_wins_restr vallazon nap jnap fogvisz1 SLdays

foreach var of varlist FEj FEj_wins{
	gen flag_missing`var' = 1 if `var' == .
		recode flag_missing`var' (.=0)
	gen `var'_imp = `var'
	sum `var'
	replace `var'_imp = `r(mean)' if flag_missing`var' == 1
	drop `var'
}
gen time=tmerge_t-t
rename flag_missingFEj flag_missingFEj_l
rename flag_missingFEj_wins flag_missingFEj_wins_l
rename FEj_imp FEj_imp_l
rename FEj_wins_imp FEj_wins_imp_l
drop ev defl t 
reshape wide flag_missingFEj_l FEj_imp_l FEj_wins_imp_l flag_missingFEj_wins_l, i(n) j(time)
rename FEj_imp_l* FEj_l*_imp    
rename FEj_wins_imp_l* FEj_wins_l*_imp    

foreach var of varlist size_pre FO_pre logmean_w_pre mean_tfp_wr_pre FEi {
	gen flag_missing`var' = 1 if `var' == .
		recode flag_missing`var' (.=0)
	gen `var'_imp = `var'
	sum `var'
	replace `var'_imp = `r(mean)' if flag_missing`var' == 1
	drop `var'
}

* add quarterly l1-l24 values of wage and firm FE

expand 25
egen t=seq(), by(n)
replace t=t-26 if t!=1
replace t=0 if t==1
replace t=t +tmerge_t

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 )
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)

preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

cap gen ev=2003+int((t-1)/12)
replace ev=2003+int((t-1)/12)

gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	
foreach X in "" _wins{
	gen lnw1`X'_restr=log((w1`X'/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	replace lnw1`X'_restr=. if EMP!=1

}
drop EMP

* create _lq1-_lq8 for the quarterly lagged averages

foreach X in "" _wins{
	foreach Y of numlist 1/8{
		egen mean_lq`Y'_w`X'_=mean(lnw1`X'_restr) if t<=tmerge_t-(3*(`Y'-1)+1) & t>=tmerge_t-(3*`Y'), by(n)
		egen mean_lq`Y'_w`X'=min(mean_lq`Y'_w`X'_), by(n)
		drop mean_lq`Y'_w`X'_
		egen mean_lq`Y'_FEj`X'_=mean(FEj`X') if t<=tmerge_t-(3*(`Y'-1)+1) & t>=tmerge_t-(3*`Y'), by(n)
		egen mean_lq`Y'_FEj`X'=min(mean_lq`Y'_FEj`X'_), by(n)
		drop mean_lq`Y'_FEj`X'_
		gen flag_missingFEj`X'_lq`Y' = 1 if mean_lq`Y'_FEj`X' == .
		recode flag_missingFEj`X'_lq`Y' (.=0)
		gen mean_lq`Y'_FEj`X'_imp = mean_lq`Y'_FEj`X'
		sum mean_lq`Y'_FEj`X'
		replace mean_lq`Y'_FEj`X'_imp = `r(mean)' if flag_missingFEj`X'_lq`Y' == 1
		drop mean_lq`Y'_FEj`X'
	}	
}
drop w1 w1_wins wh1 transfer_ny transfer_mtp lnw1_restr lnw1_wins_restr vallazon nap jnap fogvisz1 SLdays FEj FEj_wins

keep if t==tmerge_t
drop ev defl t 



* add l3-l12 values of health-related variables
	
expand 11
egen t=seq(), by(n)
replace t=t-14 if t!=1
replace t=0 if t==1
replace t=t +tmerge_t

merge m:1 anon t using "$in/admin3_eu_fekvo.dta", nogen keepusing(fekvo_ft) keep(master match)
merge m:1 anon t using "$in/admin3_eu_jaro.dta", nogen keepusing(jaro_ft_ossz) keep(master match)
merge m:1 anon t using "$in/admin3_eu_veny.dta", nogen keepusing(tbtam_ossz betegft_ossz) keep(master match)
merge m:1 anon t using "$in_med/admin3_eu_veny_H2_v2.dta", nogen keepusing(A_ft B_ft C_ft D_ft G_ft H_ft J_ft L_ft M_ft N_ft P_ft R_ft S_ft V_ft) keep(master match)

foreach var of varlist fekvo_ft jaro_ft_ossz tbtam_ossz betegft_ossz {
	replace `var'=0 if `var'==.
}
gen health = fekvo_ft + jaro_ft_ossz + tbtam_ossz + betegft_ossz
egen health_12mo_=sum(health) if t<tmerge_t, by(n)
egen health_12mo=min(health_12mo_), by(n)
gen log_health_12mo=log(health_12mo)
replace log_health_12mo=0 if log_health_12mo==.
drop health_12mo health_12mo_ fekvo_ft jaro_ft_ossz tbtam_ossz betegft_ossz
foreach var in A B C D G H J L M N P R S V {
	replace `var'_ft=0 if `var'_ft==.
	replace `var'_ft=1 if `var'_ft>0 & `var'_ft~=.
	egen `var'_12mo_=max(`var'_ft) if t<tmerge_t, by(n)
	egen `var'_12mo=min(`var'_12mo_), by(n)
	drop `var'_12mo_ `var'_ft
}
			
keep if t==tmerge_t

drop t n flag_missingFEj_l0 FEj_l0_imp health

* step 2: add the t+d-related

* one row is an anon-t-d triplet 

gen n=_n
expand 14
egen t=seq(), by(n)
replace t=t*2+2 if t!=1
replace t=0 if t==1
replace t=tmerge_t+d+t if maxt_clue==.
replace t=maxt_clue+d+t if maxt_clue!=.

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 feor1_h2)
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays
merge m:1 anon t using "$in/admin3_feor.dta", keep(master match) nogen keepusing(feor1_2003_4 feor1_2008_4)
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj

* deflated wage and hours worked changed to missing if EMP!=1 that period
gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)
tab treated if EMP==. & ((t==tmerge_t+d & maxt_clue==.) | (t==maxt_clue+d & maxt_clue!=.))

preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge


* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

gen ev=2003+int((t-1)/12)
gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* deflate the wage variable	
gen w1_defl=w1/defl
gen w1_wins_defl=w1_wins/defl

replace w1_defl=. if EMP!=1
replace w1_wins_defl=. if EMP!=1
replace wh1=. if EMP!=1 

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20

* creating month of the year
gen ho = t-int(t/12)*12
	recode ho(0=12)

* alternative wage variables 

foreach y in "" _wins{	
	foreach x in "" j{
		gen w`y'_`x'nap = w1`y'_defl/(`x'nap/31) if ho==1 | ho==3 | ho==5 | ho==7 | ho==8 | ho==10 | ho==12
			replace w`y'_`x'nap = w1`y'_defl/(`x'nap/30) if ho==4 | ho==6 | ho==9 | ho==11
			replace w`y'_`x'nap = w1`y'_defl/(`x'nap/29) if ho==2 & (ev==2012 | ev==2008 | ev==2004)
			replace w`y'_`x'nap = w1`y'_defl/(`x'nap/28) if ho==2 & ev!=2012 & ev!=2008 &  ev!=2004
			replace w`y'_`x'nap = w1`y'_defl if `x'nap==.
	}

	gen lnw1`y'_restr=ln((w1`y'_defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_adj=ln((w`y'_nap)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_nap=ln(w`y'_nap) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_jnap=ln(w`y'_jnap) if transfer_mtp!=401 & transfer_ny!=203
	gen lnw1`y'_totw=ln(w1`y'_defl)
}

gen rel_time=t-d-tmerge_t if maxt_clue==.
replace rel_time=t-d-maxt_clue if maxt_clue!=.
tab rel_time, m

foreach y in "" _wins{	
	foreach X in lnw1`y'_restr lnw1`y'_adj lnw1`y'_nap lnw1`y'_jnap lnw1`y'_totw{
		egen `X'_td1y_=mean(`X') if rel_time>=6 & rel_time<=18, by(n)
		egen `X'_td1y=min(`X'_td1y_), by(n)
		drop `X'_td1y_
		egen `X'_td2y_=mean(`X') if rel_time>=18 & rel_time<=30, by(n)
		egen `X'_td2y=min(`X'_td2y_), by(n)
		drop `X'_td2y_
	}
}
foreach X in FEj wh1 FEj_wins{
	egen `X'_td1y_=mean(`X') if rel_time>=6 & rel_time<=18, by(n)
	egen `X'_td1y=min(`X'_td1y_), by(n)
	drop `X'_td1y_
	egen `X'_td2y_=mean(`X') if rel_time>=18 & rel_time<=30, by(n)
	egen `X'_td2y=min(`X'_td2y_), by(n)
	drop `X'_td2y_
}

* 4-digit occupation at the time of returning 

xtset id t
foreach X in 3 8{
	gen feor1_200`X'_t1_=feor1_200`X'_4 if rel_time==0
	egen feor1_200`X'_t1=max(feor1_200`X'_t1_), by(n)
	drop feor1_200`X'_t1_
	gen feor1_200`X'_t2_=feor1_200`X'_4 if rel_time==12
	replace feor1_200`X'_t2_=l2.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=f2.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=l4.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=f4.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=l6.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	replace feor1_200`X'_t2_=f6.feor1_200`X'_4 if feor1_200`X'_t2_==. & rel_time==12
	egen feor1_200`X'_t2=max(feor1_200`X'_t2_), by(n)
	drop feor1_200`X'_t2_
	gen feor1_200`X'_t3_=feor1_200`X'_4 if rel_time==24
	replace feor1_200`X'_t3_=l2.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=f2.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=l4.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=f4.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=l6.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	replace feor1_200`X'_t3_=f6.feor1_200`X'_4 if feor1_200`X'_t3_==. & rel_time==24
	egen feor1_200`X'_t3=max(feor1_200`X'_t3_), by(n)
	drop feor1_200`X'_t3_
} 
foreach X in feor1_h2 vallazon {
	gen `X'_t1_=`X' if rel_time==0
	egen `X'_t1=max(`X'_t1_), by(n)
	drop `X'_t1_
	gen `X'_t2_=`X' if rel_time==12
	replace `X'_t2_=l2.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f2.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=l4.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f4.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=l6.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f6.`X' if `X'_t2_==. & rel_time==12
	egen `X'_t2=max(`X'_t2_), by(n)
	drop `X'_t2_
	gen `X'_t3_=`X' if rel_time==24
	replace `X'_t3_=l2.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f2.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=l4.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f4.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=l6.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f6.`X' if `X'_t3_==. & rel_time==24
	egen `X'_t3=max(`X'_t3_), by(n)
	drop `X'_t3_
} 


* mean tfp, FO upon return to work

merge m:1 vallazon ev using "$out/firmquality.dta", nogen keep(master match) keepusing(FO mean_tfp_wr)
xtset id t
foreach X in FO mean_tfp_wr{
	gen `X'_t1_=`X' if rel_time==0
	egen `X'_t1=max(`X'_t1_), by(n)
	drop `X'_t1_
	gen `X'_t2_=`X' if rel_time==12
	replace `X'_t2_=l2.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f2.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=l4.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f4.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=l6.`X' if `X'_t2_==. & rel_time==12
	replace `X'_t2_=f6.`X' if `X'_t2_==. & rel_time==12
	egen `X'_t2=max(`X'_t2_), by(n)
	drop `X'_t2_
	gen `X'_t3_=`X' if rel_time==24
	replace `X'_t3_=l2.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f2.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=l4.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f4.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=l6.`X' if `X'_t3_==. & rel_time==24
	replace `X'_t3_=f6.`X' if `X'_t3_==. & rel_time==24
	egen `X'_t3=max(`X'_t3_), by(n)
	drop `X'_t3_
} 

keep if rel_time==0
foreach y in "" _wins{
	foreach X in lnw1`y'_restr lnw1`y'_adj lnw1`y'_nap lnw1`y'_jnap lnw1`y'_totw{
		rename `X' `X'_t1
		rename `X'_td1y `X'_t2
		rename `X'_td2y `X'_t3
	}
}
foreach X in FEj wh1 FEj_wins{
	rename `X' `X'_t1
	rename `X'_td1y `X'_t2
	rename `X'_td2y `X'_t3
}
	
drop n t nap jnap vallazon ev w1 w1_wins fogvisz1 transfer_ny transfer_mtp SLdays feor1_2003_4 feor1_2008_4 defl w1_defl w1_wins_defl EMP rel_time  ho w_nap w_jnap w_wins_nap w_wins_jnap FO mean_tfp_wr


* add quarterly f1-f24 values of wage and firm FE after upon return to work

gen n=_n
expand 25
egen t=seq(), by(n)
replace t=t-1
gen tempt=t
replace t=tmerge_t+d+t if maxt_clue==.
replace t=maxt_clue+d+t if maxt_clue!=.

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 )
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)

preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

cap gen ev=2003+int((t-1)/12)
replace ev=2003+int((t-1)/12)

gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	
foreach X in "" _wins{
	gen lnw1`X'_restr=log((w1`X'/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
	replace lnw1`X'_restr=. if EMP!=1

}
drop EMP

* create _lq1-_lq8 for the quarterly lagged averages

foreach X in "" _wins{
	foreach Y of numlist 1/8{
		egen mean_fq`Y'_w`X'_=mean(lnw1`X'_restr) if tempt>=3*(`Y'-1)+1 & tempt<=3*`Y', by(n)
		egen mean_fq`Y'_w`X'=min(mean_fq`Y'_w`X'_), by(n)
		drop mean_fq`Y'_w`X'_
		egen mean_fq`Y'_FEj`X'_=mean(FEj`X') if tempt>=3*(`Y'-1)+1 & tempt<=3*`Y', by(n)
		egen mean_fq`Y'_FEj`X'=min(mean_fq`Y'_FEj`X'_), by(n)
		drop mean_fq`Y'_FEj`X'_
		gen flag_missingFEj`X'_fq`Y' = 1 if mean_fq`Y'_FEj`X' == .
		recode flag_missingFEj`X'_fq`Y' (.=0)
		gen mean_fq`Y'_FEj`X'_imp = mean_fq`Y'_FEj`X'
		sum mean_fq`Y'_FEj`X'
		replace mean_fq`Y'_FEj`X'_imp = `r(mean)' if flag_missingFEj`X'_fq`Y' == 1
		drop mean_fq`Y'_FEj`X'
	}	
}
drop w1 w1_wins wh1 transfer_ny transfer_mtp lnw1_restr lnw1_wins_restr vallazon nap jnap fogvisz1 SLdays FEj FEj_wins n

keep if tempt==0
drop ev defl t tempt 

* add firm fixed effect and main wage variable in the months after the accident - m1 m2 m3 ... m12

gen n=_n
expand 12
egen t=seq(), by(n)
replace t=tmerge_t+t if maxt_clue==.
replace t=maxt_clue+t if maxt_clue!=.

merge m:1 anon t using "$in/admin3_alap.dta", keep(master match) nogen keepusing(vallazon1 w1 wh1 transfer_mtp transfer_ny nap jnap fogvisz1 )
rename vallazon1 vallazon
merge m:1 vallazon using "$out_AKM/FEj_full_Jan2023.dta", keep(master match) nogen keepusing(vall_fix)
rename vall_fix FEj
merge m:1 anon t using "$in/admin3_passziv.dta", keepusing(passziv_tip1) keep(master match) nogen
rename passziv_tip1 SLdays

gen EMP=1 if (vallazon!=.) & ((w1!=. & w1>0) | (wh1!=. & wh1>0)) & (SLdays<6 | SLdays==.) & (fogvisz1==201)

preserve
keep if EMP==1
keep vallazon FEj
duplicates drop
gen FEj_wins=FEj
egen FEj1=pctile(FEj), p(1)
egen FEj99=pctile(FEj), p(99)
replace FEj_wins=FEj1 if FEj_wins<FEj1 & FEj_wins!=0 
replace FEj_wins=FEj99 if FEj_wins>FEj99 & FEj_wins!=. 
drop FEj1 FEj99
keep vallazon FEj_wins
tempfile FEjwins
save `FEjwins'
restore
merge m:1 vallazon using `FEjwins'
drop if _merge==2
drop _merge

* create a winsorized version of the wage variable, only for the EMP=1
gen w1_wins=w1
egen wp1=pctile(w1) if EMP==1, by(t) p(1)
egen wp99=pctile(w1) if EMP==1, by(t) p(99)
replace w1_wins=wp1 if w1_wins<wp1 & w1_wins!=0 & EMP==1
replace w1_wins=wp99 if w1_wins>wp99 & w1_wins!=. & EMP==1
drop wp1 wp99

gen ev=2003+int((t-1)/12)
gen defl=1 if ev==2003
	replace defl = 1.067991144 if ev==2004
	replace defl = 1.106443719 if ev==2005
	replace defl = 1.149586344 if ev==2006
	replace defl = 1.241552086 if ev==2007
	replace defl = 1.317292007 if ev==2008
	replace defl = 1.372611279 if ev==2009
	replace defl = 1.439874155 if ev==2010
	replace defl = 1.496329527 if ev==2011
	replace defl = 1.581158238 if ev==2012
	replace defl = 1.608512002 if ev==2013
	replace defl = 1.604987182 if ev==2014
	replace defl = 1.603851084 if ev==2015
	replace defl = 1.610259846 if ev==2016
	replace defl = 1.648100676 if ev==2017

* cenzoring hours worked
replace wh1=40 if wh1>40 & wh1!=.
replace wh1=20 if wh1<20	
gen lnw1_restr=log((w1/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
replace lnw1_restr=. if EMP!=1
gen lnw1_wins_restr=log((w1_wins/defl)/((wh1*30)/7)) if transfer_mtp!=401 & transfer_ny!=203
replace lnw1_wins_restr=. if EMP!=1
drop EMP
drop w1 w1_wins wh1 transfer_ny transfer_mtp vallazon nap jnap fogvisz1 SLdays ev defl

gen time=t-tmerge_t if maxt_clue==.
replace time=t-maxt_clue if maxt_clue!=.

drop t 
reshape wide FEj FEj_wins lnw1_restr lnw1_wins_restr, i(n) j(time)
foreach X of numlist 1/12{
	rename FEj`X' FEj_m`X'
	rename FEj_wins`X' FEj_wins_m`X'
	rename lnw1_restr`X' lnw1_restr_m`X'
	rename lnw1_wins_restr`X' lnw1_wins_restr_m`X'
}

rename t_pre t

gen ev=2003+int((tmerge_t-1)/12)

drop n

rename feor1_*_4_t feor1_*_t

order anon tmerge_t ev t d maxt_clue mint_clu flagEMPml1 flagEMPm0 diff_year treated control ferfi kor vallazon_t w1_t w1_wins_t wh1_t fogvisz1_t feor1_h2_t occupation teaor1_h1 logmean_w_pre_imp FO_pre_imp size_pre_imp flag_missingsize_pre flag_missingFO_pre flag_missinglogmean_w_pre FEi_imp flag_missingFEi mean_lag_w FEj_l*_imp flag_missingFEj_l* A_12mo B_12mo C_12mo D_12mo G_12mo H_12mo J_12mo L_12mo M_12mo N_12mo P_12mo R_12mo S_12mo V_12mo log_health_12mo lnw1_restr_t0 lnw1_restr_t1 lnw1_restr_t2 lnw1_restr_t3 lnw1_wins_restr_t0 lnw1_wins_restr_t1 lnw1_wins_restr_t2 lnw1_wins_restr_t3 FEj_t0 FEj_t1 FEj_t2 FEj_t3 mean_tfp_wr_pre_imp flag_missingmean_tfp_wr_pre feor1_2003_t feor1_2008_t feor1_2003_t1 feor1_2008_t1 feor1_2003_t2 feor1_2008_t2 feor1_2003_t3 feor1_2008_t3 FO_t FO_t1 FO_t2 FO_t3 mean_tfp_wr_t mean_tfp_wr_t1 mean_tfp_wr_t2 mean_tfp_wr_t3 wh1_t0 wh1_t1 wh1_t2 wh1_t3 lnw1_totw_t0 lnw1_totw_t1 lnw1_totw_t2 lnw1_totw_t3 lnw1_adj_t0 lnw1_adj_t1 lnw1_adj_t2 lnw1_adj_t3 lnw1_nap_t0 lnw1_nap_t1 lnw1_nap_t2 lnw1_nap_t3 lnw1_jnap_t0 lnw1_jnap_t1 lnw1_jnap_t2 lnw1_jnap_t3 FEj_m* lnw1_restr_m* lnw1_wins_totw_t0 lnw1_wins_totw_t1 lnw1_wins_totw_t2 lnw1_wins_totw_t3 lnw1_wins_adj_t0 lnw1_wins_adj_t1 lnw1_wins_adj_t2 lnw1_wins_adj_t3 lnw1_wins_nap_t0 lnw1_wins_nap_t1 lnw1_wins_nap_t2 lnw1_wins_nap_t3 lnw1_wins_jnap_t0 lnw1_wins_jnap_t1 lnw1_wins_jnap_t2 lnw1_wins_jnap_t3  lnw1_wins_restr_m* vallazon_t1 vallazon_t2 vallazon_t3 feor1_h2_t1 feor1_h2_t2 feor1_h2_t3 FEj_wins_t0 FEj_wins_t1 FEj_wins_t2 FEj_wins_t3 FEj_wins_l*_imp flag_missingFEj_wins_l* FEj_wins_m* mean_lq?_w mean_fq?_w mean_lq?_w_wins mean_fq?_w_wins mean_lq?_FEj_imp mean_fq?_FEj_imp mean_lq?_FEj_wins_imp mean_fq?_FEj_wins_imp flag_missingFEj_lq? flag_missingFEj_fq? flag_missingFEj_wins_lq? flag_missingFEj_wins_fq? 




 
assert d==1 if control==1
sum
save "$out/data_for_futures_regs_with_d_2_6_20240508.dta"

* treated observations identified by anon if treated==1 (each anon can have at most one t in which she is treated)
* control observations identified by id if treated==0 (one anon can serve as a control in multiple t-s)
